import pandas as pd
from pdf2excelv2.get_attributes import out2file

# 获取推荐意见
def get_opinion_train(file_path):
    tep_dics = []
    df = pd.read_excel(file_path, engine='openpyxl')
    for index, row in df.iterrows():
        # 在这里处理每一行的数据
        i = 1
        while True:
            text = row['推荐意见{}'.format(i)]
            if not pd.isna(text):
                tep_dic = {"ask": "根据文本内容:“" + str(text) + "”回答实体类型是什么", "answer": '推荐意见'}
                tep_dics.append(tep_dic)
                i = i + 1
            else:
                break
    lst = list({tuple(d.items()) for d in tep_dics})
    tep_dics = [dict(item) for item in lst]
    return tep_dics


def get_train_data():
    res = get_opinion_train('../data/中文指南推荐意见.xlsx')
    out2file('opinion_traindata001', res)
if __name__ == '__main__':
    get_train_data()